library(rethinking)

4.1 Why normal distributions are normal

pos <- replicate(1000, sum(runif(16, -1, 1)))
hist(pos)

plot(density(pos))

dens(pos)

# 4.2
prod(1 + runif(12, 0, 0.1))
## [1] 1.858046
growth <- replicate(10000, prod(1 + runif(12, 0, 0.1)))
dens(growth, norm.comp = TRUE)

big <- replicate(10000, prod(1 + runif(12, 0, 0.5)))
dens(big, norm.comp = TRUE)

small <- replicate(10000, prod(1 + runif(12, 0, 0.01)))
dens(small, norm.comp = TRUE)

# 4.5
log.big <- replicate(10000, log(prod(1 + runif(12, 0, 0.5))))
dens(log.big, norm.comp = TRUE)

4.2 A language for describing models

# 4.6
w <- 6
n <- 9
p_grid <- seq(from = 0, to = 1, length.out = 100)
posterior <- dbinom(w, n, p_grid) * dunif(p_grid, 0, 1)
posterior <- posterior/sum(posterior)

4.3 A Gaussian model of height

library(rethinking)
data("Howell1")
d <- Howell1
d
str(d)
## 'data.frame':    544 obs. of  4 variables:
##  $ height: num  152 140 137 157 145 ...
##  $ weight: num  47.8 36.5 31.9 53 41.3 ...
##  $ age   : num  63 63 65 41 51 35 32 27 19 54 ...
##  $ male  : int  1 0 0 1 0 1 0 1 0 1 ...
# d$height
d2 <- d[d$age >= 18,]
dens(d2$height)

curve(dnorm(x, 178, 20), from = 100, to = 250)

curve(dunif(x, 0, 50), from = -10, to = 60)

# 4.13
sample_mu <- rnorm(1e4, 178, 20)
sample_sigma <- runif(1e4, 0, 50)
prior_h <- rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)

4.3.3 Grid approximation of the posterior distribution

## R code 4.14
mu.list <- seq( from=140, to=160 , length.out=200 )
sigma.list <- seq( from=4 , to=9 , length.out=200 )
post <- expand.grid( mu=mu.list , sigma=sigma.list )
post$LL <- sapply( 1:nrow(post) , function(i) sum( dnorm(
                d2$height ,
                mean=post$mu[i] ,
                sd=post$sigma[i] ,
                log=TRUE ) ) )
post$prod <- post$LL + dnorm( post$mu , 178 , 20 , TRUE ) +
    dunif( post$sigma , 0 , 50 , TRUE )
post$prob <- exp( post$prod - max(post$prod) )
# 4.15
contour_xyz(post$mu, post$sigma, post$prob)

image_xyz(post$mu, post$sigma, post$prob)

4.3.4 Sampling from the posterior

# 4.17
sample.rows <- sample(1:nrow(post), size = 1e4, replace = TRUE, prob = post$prob)
sample.mu <- post$mu[sample.rows]
sample.sigma <- post$sigma[sample.rows]

plot(sample.mu, sample.sigma, cex=1, pch = 16, col = col.alpha(rangi2, 0.1))

dens(sample.mu)

dens(sample.sigma)

HPDI(sample.mu)
##    |0.89    0.89| 
## 153.8693 155.1759
HPDI(sample.sigma)
##    |0.89    0.89| 
## 7.291457 8.195980
d3 <- sample(d2$height, size = 20)
mu.list <- seq( from=150, to=170 , length.out=200 )
sigma.list <- seq( from=4 , to=20 , length.out=200 )
post2 <- expand.grid( mu=mu.list , sigma=sigma.list )
post2$LL <- sapply( 1:nrow(post2) , function(i)
    sum( dnorm( d3 , mean=post2$mu[i] , sd=post2$sigma[i] ,
    log=TRUE ) ) )
post2$prod <- post2$LL + dnorm( post2$mu , 178 , 20 , TRUE ) +
    dunif( post2$sigma , 0 , 50 , TRUE )
post2$prob <- exp( post2$prod - max(post2$prod) )
sample2.rows <- sample( 1:nrow(post2) , size=1e4 , replace=TRUE ,
    prob=post2$prob )
sample2.mu <- post2$mu[ sample2.rows ]
sample2.sigma <- post2$sigma[ sample2.rows ]
plot( sample2.mu , sample2.sigma , cex=0.5 ,
    col=col.alpha(rangi2,0.1) ,
    xlab="mu" , ylab="sigma" , pch=16 )

dens(sample2.sigma, norm.comp = TRUE)

4.3.5 Fitting the model with map

library(rethinking)
data("Howell1")
d <- Howell1
d2 <- d[d$age >= 18,]
flist <- alist(
  height ~ dnorm(mu, sigma),
  mu ~ dnorm(178, 20),
  sigma ~ dunif(0, 50)
)

m4.1 <- map(flist, data = d2)

precis(m4.1)
##         Mean StdDev   5.5%  94.5%
## mu    154.61   0.41 153.95 155.27
## sigma   7.73   0.29   7.27   8.20
m4.2 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu ~ dnorm(178, 0.1),
    sigma ~ dunif(0, 50)
  ), 
  data = d2
)

precis(m4.2)
##         Mean StdDev   5.5%  94.5%
## mu    177.86   0.10 177.70 178.02
## sigma  24.52   0.93  23.03  26.00

4.3.6 Sampling from a map fit

vcov(m4.1)
##                 mu        sigma
## mu    0.1697397388 0.0002180563
## sigma 0.0002180563 0.0849059839
diag(vcov(m4.1))
##         mu      sigma 
## 0.16973974 0.08490598
cov2cor(vcov(m4.1))
##                mu       sigma
## mu    1.000000000 0.001816384
## sigma 0.001816384 1.000000000
library(rethinking)
post <- extract.samples(m4.1, n = 1e4)
head(post)
precis(post)
##         Mean StdDev  |0.89  0.89|
## mu    154.60   0.41 153.91 155.23
## sigma   7.73   0.29   7.24   8.18
plot(post, cex = 0.5, pch = 16, col = col.alpha(rangi2, 0.5))

4.4 Addig a predictor

plot(d2$height ~ d2$weight)

4.4.1 The linear model strategy

4.4.2 Fitting the model

# load data again since it's a long way back
library(rethinking)
data("Howell1")
d <- Howell1
d2 <- d[d$age >= 18,]

# fit model
m4.3 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b*weight,
    a ~ dnorm(156, 100),
    b ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ), 
  data = d2
)

4.4.3 Interpreting the model fit

precis(m4.3)
##         Mean StdDev   5.5%  94.5%
## a     113.90   1.91 110.85 116.94
## b       0.90   0.04   0.84   0.97
## sigma   5.07   0.19   4.77   5.38
precis(m4.3, corr = TRUE)
##         Mean StdDev   5.5%  94.5%     a     b sigma
## a     113.90   1.91 110.85 116.94  1.00 -0.99     0
## b       0.90   0.04   0.84   0.97 -0.99  1.00     0
## sigma   5.07   0.19   4.77   5.38  0.00  0.00     1
d2$weight.c <- d2$weight - mean(d2$weight)
m4.4 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b*weight.c,
    a ~ dnorm(178, 100),
    b ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = d2
)

precis(m4.4, corr = TRUE)
##         Mean StdDev   5.5%  94.5% a b sigma
## a     154.60   0.27 154.17 155.03 1 0     0
## b       0.90   0.04   0.84   0.97 0 1     0
## sigma   5.07   0.19   4.77   5.38 0 0     1
plot(height ~ weight, data = d2)
abline(a = coef(m4.3)["a"], b = coef(m4.3)["b"])

post <- extract.samples(m4.3)
post[1:5,]
N <- 10
dN <- d2[1:N, ]
mN <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b*weight,
    a ~ dnorm(178, 100),
    b ~ dnorm(0, 50),
    sigma ~ dunif(0, 50)
  ), data = dN
)
# extract 20 samples from the posterio
post <- extract.samples(mN, n = 20)

# display raw data and sample size
plot(dN$weight, dN$height,
     xlim = range(d2$weight), ylim = range(d2$height),
     col = rangi2, xlab="weight", ylab="height")
mtext(concat("N = ", N))

# plot the lines with transparency
for (i in 1:20) abline(a = post$a[i], b = post$b[i], col=col.alpha("black", 0.3))

mu_at_50 <- post$a + post$b * 50 
dens(mu_at_50, col = rangi2, lwd = 2, xlab = "mu|weight = 50")

HPDI(mu_at_50, prob = 0.89)
##    |0.89    0.89| 
## 154.4809 157.9070
mu <- link(m4.3)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:352] 157 158 157 157 157 ...
# define sequence of weights to compute predictions for
# these values will be on the horizontal axis
weight.seq <- seq( from=25 , to=70 , by=1 )

# use link to compute mu
# for each sample from posterior
# and for each weight in weight.seq
mu <- link( m4.3 , data=data.frame(weight=weight.seq) )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:46] 137 137 136 137 138 ...
# use type="n" to hide raw data
plot(height ~ weight, d2, type = "n")

# loop over samples and plot each mu value
for(i in 1:100){
  points(weight.seq, mu[i,], pch = 16, col = col.alpha(rangi2, 0.1))
}

mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)
# plot raw data
# fading out points to make line and interval more visible
plot(height ~ weight, data = d2, col=col.alpha(rangi2, 0.5))

# plot the MAP line, aka the mean mu for each weight
lines(weight.seq, mu.mean)

# plot a shaded region for the 89% HPDI
shade(mu.HPDI, weight.seq)

sim.height <- sim(m4.3, data = list(weight = weight.seq))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(sim.height)
##  num [1:1000, 1:46] 131 135 147 140 126 ...
height.PI <- apply(sim.height, 2, PI, prob=0.89)

# plot raw data
plot( height ~ weight , d2 , col=col.alpha(rangi2,0.5) )

# draw MAP line
lines( weight.seq , mu.mean )

# draw HPDI region for line
shade( mu.HPDI , weight.seq )

# draw PI region for simulated heights
shade( height.PI , weight.seq )

4.5 Polynomial regression

library(rethinking)
data("Howell1")
d <- Howell1
str(d)
## 'data.frame':    544 obs. of  4 variables:
##  $ height: num  152 140 137 157 145 ...
##  $ weight: num  47.8 36.5 31.9 53 41.3 ...
##  $ age   : num  63 63 65 41 51 35 32 27 19 54 ...
##  $ male  : int  1 0 0 1 0 1 0 1 0 1 ...
plot(d$weight, d$height)

d$weight.s <- (d$weight - mean(d$weight))/sd(d$weight)
plot(d$weight.s, d$height)

d$weight.s2 <- d$weight.s^2
m4.5 <- map(
    alist(
        height ~ dnorm( mu , sigma ) ,
        mu <- a + b1*weight.s + b2*weight.s2 ,
        a ~ dnorm( 178 , 100 ) ,
        b1 ~ dnorm( 0 , 10 ) ,
        b2 ~ dnorm( 0 , 10 ) ,
        sigma ~ dunif( 0 , 50 )
    ) ,
    data=d )
precis(m4.5)
##         Mean StdDev   5.5%  94.5%
## a     146.66   0.37 146.07 147.26
## b1     21.40   0.29  20.94  21.86
## b2     -8.42   0.28  -8.86  -7.97
## sigma   5.75   0.17   5.47   6.03
weight.seq <- seq(from = -2.2, to = 2, length.out = 30)
pred_dat <- list(weight.s = weight.seq, weight.s2 = weight.seq ^ 2)
mu <- link(m4.5, data = pred_dat)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply(mu, 2, mean)
mu.PI <- apply(mu, 2, PI, prob = 0.89)
sim.height <- sim(m4.5, data = pred_dat)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)

plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)

d$weight.s3 <- d$weight.s^3
m4.6 <- map(
    alist(
        height ~ dnorm( mu , sigma ) ,
        mu <- a + b1*weight.s + b2*weight.s2 + b3*weight.s3 ,
        a ~ dnorm( 178 , 100 ) ,
        b1 ~ dnorm( 0 , 10 ) ,
        b2 ~ dnorm( 0 , 10 ) ,
        b3 ~ dnorm( 0 , 10 ) ,
        sigma ~ dunif( 0 , 50 )
    ) ,
    data=d )
weight.seq <- seq(from = -2.2, to = 2, length.out = 30)
pred_dat <- list(weight.s = weight.seq, 
                 weight.s2 = weight.seq^2,
                 weight.s3 = weight.seq^3)
mu <- link(m4.6, data = pred_dat)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply(mu, 2, mean)
mu.PI <- apply(mu, 2, PI, prob = 0.89)
sim.height <- sim(m4.6, data = pred_dat)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI, prob = 0.89)

plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)

plot(height ~ weight.s, d, col = col.alpha(rangi2, 0.5), xaxt = "n")
at <- c(-2, -1, 0, 1, 2)
labels <- at*sd(d$weight) + mean(d$weight)
axis(side = 1, at = at, labels = round(labels, 1))

4.7 Practice

Easy

4E1

The first line

4E2

Two

4E3

p(mu, sigma | y = Pi Normal(hi|mu, sigma)Normal(mu | 0, 10) Uniform( sigma | 0, 10))/…

4E4

The second line

4E5

Three: a, b, s

Medium

4M1

sample.mu <- rnorm(1e4, 0, 10)
sample.sigma <- runif(1e4, 0, 10)
sample.y <- rnorm(1e4, sample.mu, sample.sigma)

dens(sample.y)

4M2

flist <- alist(
  height ~ dnorm(mu, sigma),
  mu ~ dnorm(0, 10),
  sigma ~ dunif(0, 10)
)

4M3

flist <- alist(
  y ~ dnorm(mu, sigma),
  mu <- a + b * x,
  a ~ dnorm(0, 50),
  b ~ dnorm(0, 10),
  sigma ~ dunif(0, 50)
)

\[\begin{align} y_{i} &\sim \text{Normal}(\mu, \sigma) \\ \mu_{i} &= \alpha + \beta x_{i} \\ \alpha &\sim \text{Normal}(0, 50) \\ \beta &\sim \text{Uniform}(0, 10) \\ \sigma &\sim \text{Uniform}(0, 50) \end{align}\]


4M4

\[\begin{align} h_{i} &\sim \text{Normal}(\mu, \sigma) \\ \mu &= \alpha + \beta x_{i} \\ \alpha &\sim \text{Normal}(150, 50) \\ \beta &\sim \text{Normal}(4, 2) \\ \sigma &\sim \text{Uniform}(0, 50) \end{align}\]


4M5

\[\begin{align} h_{i} &\sim \text{Normal}(\mu, \sigma) \\ \mu &= \alpha + \beta x_{i} \\ \alpha &\sim \text{Normal}(120, 10) \\ \beta &\sim \text{Normal}(7, 1) \\ \sigma &\sim \text{Uniform}(0, 20) \end{align}\]


4M6

\[\begin{align} h_{i} &\sim \text{Normal}(\mu, \sigma) \\ \mu &= \alpha + \beta x_{i} \\ \alpha &\sim \text{Normal}(120, 10) \\ \beta &\sim \text{Normal}(7, 1) \\ \sigma &\sim \text{Uniform}(0, 8) \end{align}\]

Hard

4H1

Build the MAP model:

library(rethinking)
data("Howell1")

d <- Howell1
m4H1 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b * weight,
    a ~ dnorm(140, 30),
    b ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = d
)

m4H1
## 
## Maximum a posteriori (MAP) model fit
## 
## Formula:
## height ~ dnorm(mu, sigma)
## mu <- a + b * weight
## a ~ dnorm(140, 30)
## b ~ dnorm(0, 10)
## sigma ~ dunif(0, 50)
## 
## MAP values:
##         a         b     sigma 
## 75.515505  1.762385  9.345925 
## 
## Log-likelihood: -1987.71

Now calculate the posterior distribution of heights for each weight value in our table:

new_weight <- c(46.95, 43.72, 64.78, 32.59, 54.63)
pred_height <- link(m4H1, data = data.frame(weight = new_weight))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(pred_height)
##  num [1:1000, 1:5] 158 159 159 159 158 ...

Plot the distributions of pred_height for each weight.

plot(height ~ weight, d, type = "n")

for (i in 1:100) {
  points(new_weight, pred_height[i,], pch = 16, col = col.alpha(rangi2, 0.5))
}

Calculate the mean and HPDI for the predictions:

(pred_mean <- apply(pred_height, 2, mean))
## [1] 158.2769 152.5832 189.7070 132.9636 171.8150
(pred_interval <- apply(pred_height, 2, HPDI))
##           [,1]     [,2]     [,3]     [,4]     [,5]
## |0.89 157.5098 151.8171 188.4021 132.3603 170.8255
## 0.89| 159.1255 153.2720 191.1709 133.6093 172.9108

Put it into a data frame:

df <- data.frame(
  individual = 1:5,
  weight = new_weight,
  pred_height = pred_mean,
  lower = pred_interval[1,],
  upper = pred_interval[2,]
)

df

4H2

d <- Howell1
df <- d[d$age < 18,]
range(df$height)
## [1]  53.975 158.115
summary(df$height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   53.98   89.13  111.12  108.32  127.72  158.12
dens(df$height)

HPDI(df$height)
##   |0.89   0.89| 
##  67.945 147.955
sd(df$height)
## [1] 25.74514

(a)

m4H2 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b * weight,
    a ~ dnorm(110, 30),
    b ~ dnorm(0, 10),
    sigma ~ dunif(0, 50)
  ),
  data = df
)

precis(m4H2, corr = TRUE)
##        Mean StdDev  5.5% 94.5%     a     b sigma
## a     58.34   1.40 56.11 60.58  1.00 -0.90  0.01
## b      2.72   0.07  2.61  2.82 -0.90  1.00 -0.01
## sigma  8.44   0.43  7.75  9.13  0.01 -0.01  1.00

For every 10 units increase in weight, the model predicts a 27 unit increase in height. The model also estimates that the standard deviation for under-18s is 8.44.

fit <- lm(height ~ weight, data = df)
summary(fit)
## 
## Call:
## lm(formula = height ~ weight, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.960  -5.083   1.640   6.048  19.142 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 58.23060    1.40454   41.46   <2e-16 ***
## weight       2.72009    0.06865   39.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.481 on 190 degrees of freedom
## Multiple R-squared:  0.892,  Adjusted R-squared:  0.8915 
## F-statistic:  1570 on 1 and 190 DF,  p-value: < 2.2e-16

(b)

# Determine the range of weights
range(df$weight)
## [1]  4.252425 44.735511
# Define sequence of weight to predict on
weight.seq <- seq(4, 46, by = 2)

# compute mu for each sample from the posterior and for each weight we want to predict on
mu <- link(m4H2, data = data.frame(weight = weight.seq))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:22] 67.9 68.4 68.2 70.3 70.1 ...

Summarise the distribution of mu

mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI, prob = 0.89)

Calculate the prediction interval

sim.height <- sim(m4H2, data = list(weight = weight.seq))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI)
# Plot raw data
plot(height ~ weight, df, col = col.alpha(rangi2, 0.5))

# Add map line
lines(weight.seq, mu.mean)

# Add HPDI interval for the line
shade(mu.HPDI, weight.seq)

# Add PI interval for the simulated heights
shade(height.PI, weight.seq)

(c)

I’m concerned that:

  • The relationship between height and weight does not appear to be linear in the raw data

4H3

(a)

Start with model definition:

\[\begin{align} h_{i} &\sim \text{Normal}(\mu_{i}, \sigma) \\ \mu_{i} &= \alpha + \beta \text{log}(w_{i}) \\ \alpha &\sim \text{Normal}(178, 100) \\ \beta &\sim \text{Normal}(0, 100) \\ \sigma &\sim \text{Uniform}(0, 50) \end{align}\]

d <- Howell1

m4H3 <- map(
  alist(
    height ~ dnorm(mu, sigma),
    mu <- a + b * log(weight),
    a ~ dnorm(178, 100),
    b ~ dnorm(0, 100),
    sigma ~ dunif(0, 50)
  ),
  data = d
)

precis(m4H3, corr = TRUE)
##         Mean StdDev   5.5%  94.5%     a     b sigma
## a     -23.79   1.34 -25.92 -21.65  1.00 -0.99     0
## b      47.08   0.38  46.46  47.69 -0.99  1.00     0
## sigma   5.13   0.16   4.89   5.38  0.00  0.00     1

Interpretation:

  • the value of a is -24 cm, meaning that a person with log(0) weight in kg will be -24 cm tall. This is silly, of course. For one reason, log(0) is -Inf!
  • the value of b is 47 cm/log(kg), meaning that for every increase of one log unit of weight, the height increases by 47 cm.
  • the estimate of \(\sigma\) means that, in the model, the estimate of standard deviation of height is 5.1 cm.

(b)

plot(height ~ weight, data = Howell1, col = col.alpha(rangi2, 0.4))

# Estimate the MAP regression line and 89% HPDI for the mean
weight.seq <- seq(
  from = min(d$weight),
  to = max(d$weight),
  by = 2
)

mu <- link(m4H3, data = data.frame(weight = weight.seq))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
str(mu)
##  num [1:1000, 1:30] 44.8 44.3 43.3 43.8 44.5 ...

Summarise the distribution of \(\mu\)

mu.mean <- apply(mu, 2, mean)
mu.HPDI <- apply(mu, 2, HPDI)

Calculate the prediction interval

sim.height <- sim(m4H3, data = list(weight = weight.seq))
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.PI <- apply(sim.height, 2, PI)

Plot it

# Plot original data
plot(height ~ weight, data = Howell1, col = col.alpha(rangi2, 0.4))

# Add regression line for mu
lines(weight.seq, mu.mean)

# Add HPDI interval for the line
shade(mu.HPDI, weight.seq)

# Add PI interval for the predicted heights
shade(height.PI, weight.seq)